{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 搭建倒排表\n",
    "倒排表的作用是让搜索更加快速，是搜索引擎中常用的技术。根据课程中所讲的方法，你需要完成这部分的代码。 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "import numpy as np\n",
    "import pickle\n",
    "from gensim.models import KeyedVectors  # 词向量用来比较俩俩之间相似度\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取数据： 导入在preprocessor.ipynb中生成的data/question_answer_pares.pkl文件，并将其保存在变量QApares中\n",
    "with open('data/question_answer_pares.pkl','rb') as f:\n",
    "    QApares = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['买', '二份', '有没有', '少点']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数据\n",
    "print(type(QApares))\n",
    "#尝试增大递归限制\n",
    "#import sys\n",
    "#sys.setrecursionlimit(50000)\n",
    "QApares.question_after_preprocessing[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```TODO1``` 构造一个倒排表，不需要考虑单词的相似度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建一个倒排表，有关倒排表的详细内容参考实验手册\n",
    "# 为了能够快速检索，倒排表应用哈希表来存储。python中字典内部便是用哈希表来存储的，所以这里我们直接将倒排表保存在字典中\n",
    "# 注意：在这里不需要考虑单词之间的相似度。\n",
    "inverted_list = {}\n",
    "for index,sentence in enumerate(QApares.question_after_preprocessing):\n",
    "    ### 你需要完成的代码\n",
    "    for word in sentence:\n",
    "        ###判断单词是否在字典中：\n",
    "        if word not in inverted_list.keys():\n",
    "            inverted_list[word] = list([index])\n",
    "        ###判断文章id是否在列表中\n",
    "        if index not in inverted_list[word]:\n",
    "            inverted_list[word] = inverted_list[word]+list([index])\n",
    "            \n",
    "    ### 你需要完成的代码结束"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "#d ata/retrieve/sgns.zhihu.word是从https://github.com/Embedding/Chinese-Word-Vectors下载到的预训练好的中文词向量文件\n",
    "#使 用KeyedVectors.load_word2vec_format()函数加载预训练好的词向量文件\n",
    "model = KeyedVectors.load_word2vec_format('data/retrieve/sgns.zhihu.word')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_similar_by_word(word,topk):\n",
    "    '''\n",
    "        返回与一个单词word相似度最高的topk个单词所组成的单词列表\n",
    "        出参：\n",
    "            word_list：与word相似度最高的topk个单词所组成的单词列表。格式为[单词1，单词2，单词3，单词4，单词5]\n",
    "    '''\n",
    "    similar_words = model.similar_by_word(word,topk)\n",
    "    word_list = [word[0] for word in similar_words]\n",
    "    return word_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```TODO2``` 构造一个新的倒排表，考虑单词之间的语义相似度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/3832 [00:00<?, ?it/s]/Applications/anaconda3/envs/py36/lib/python3.6/site-packages/gensim/models/keyedvectors.py:772: RuntimeWarning: invalid value encountered in true_divide\n",
      "  dists = dot(self.vectors[clip_start:clip_end], mean) / self.norms[clip_start:clip_end]\n",
      "100%|██████████| 3832/3832 [01:12<00:00, 53.03it/s]\n"
     ]
    }
   ],
   "source": [
    "# TODO：\n",
    "# 构造一个新的倒排表，并将结果保存在字典inverted_list_new中\n",
    "# 新的倒排表键为word，值为老倒排表[word]、老倒排表[单词1]、老倒排表[单词2]、老倒排表[单词3]、老倒排表[单词4]的并集\n",
    "# 即新倒排表保存了包含单词word或包含与单词word最相近的5个单词中的某一个的问题的index\n",
    "inverted_list_new = {}\n",
    "for word in tqdm(inverted_list):\n",
    "    ### 你需要完成的部分\n",
    "    try:\n",
    "        x_list=inverted_list[word]\n",
    "        k=4\n",
    "        k_list=get_similar_by_word(word=word,topk=k)\n",
    "        for i0 in range(0,k):\n",
    "            x_list = x_list+inverted_list[k_list[i0]]\n",
    "        ###文档id去重\n",
    "        inverted_list_new[word]=list(set(x_list))\n",
    "    except:\n",
    "        inverted_list_new[word] = inverted_list[word]\n",
    "    ### 你需要完成的代码结束\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将新的倒排表保存在文件data/retrieve/invertedList.pkl中\n",
    "with open('data/retrieve/invertedList.pkl','wb') as f:\n",
    "    pickle.dump(inverted_list_new,f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以下为测试，完成上述过程之后，可以运行以下的代码来测试准确性。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "#这一格的内容是从preprocessor.ipynb中粘贴而来，包含了数据预处理的几个关键函数\n",
    "import emoji\n",
    "import re\n",
    "import jieba\n",
    "def clean(content):\n",
    "    content = emoji.demojize(content)\n",
    "    content = re.sub('<.*>','',content)\n",
    "    return content\n",
    "#这一函数是用于对句子进行分词，在preprocessor.ipynb中由于数据是已经分好词的，所以我们并没有进行这一步骤，但是对于一个新的问句，这一步是必不可少的\n",
    "def question_cut(content):\n",
    "    return list(jieba.cut(content))\n",
    "def strip(wordList):\n",
    "    return [word.strip() for word in wordList if word.strip()!='']\n",
    "with open(\"data/stopWord.json\",\"r\") as f:\n",
    "    stopWords = f.read().split(\"\\n\")\n",
    "def rm_stop_word(wordList):\n",
    "    return [word for word in wordList if word not in stopWords]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 从data/retrieve/invertedList.pkl加载倒排表并将其保存在变量invertedList中\n",
    "with open('data/retrieve/invertedList.pkl','rb') as f:\n",
    "    invertedList = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_retrieve_result(sentence):\n",
    "    '''\n",
    "        输入一个句子sentence，根据倒排表进行快速检索，返回与该句子较相近的一些候选问题的index\n",
    "        候选问题由包含该句子中任一单词或包含与该句子中任一单词意思相近的单词的问题索引组成\n",
    "    '''\n",
    "    sentence = clean(sentence)\n",
    "    sentence = question_cut(sentence)\n",
    "    sentence = strip(sentence)\n",
    "    sentence = rm_stop_word(sentence)\n",
    "    candidate = set()\n",
    "    for word in sentence:\n",
    "        if word in invertedList:\n",
    "            candidate = candidate | set(invertedList[word])#需要变成set类型才能运算\n",
    "    return candidate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{81920,\n",
       " 16386,\n",
       " 5,\n",
       " 65541,\n",
       " 81927,\n",
       " 32776,\n",
       " 81930,\n",
       " 81935,\n",
       " 17,\n",
       " 18,\n",
       " 65554,\n",
       " 16401,\n",
       " 81947,\n",
       " 29,\n",
       " 65566,\n",
       " 32800,\n",
       " 81953,\n",
       " 32803,\n",
       " 98339,\n",
       " 81959,\n",
       " 32810,\n",
       " 98346,\n",
       " 49194,\n",
       " 32818,\n",
       " 55,\n",
       " 49209,\n",
       " 98366,\n",
       " 64,\n",
       " 49219,\n",
       " 65604,\n",
       " 81988,\n",
       " 16458,\n",
       " 65611,\n",
       " 81995,\n",
       " 81998,\n",
       " 16463,\n",
       " 16464,\n",
       " 49233,\n",
       " 32850,\n",
       " 98387,\n",
       " 49234,\n",
       " 16475,\n",
       " 49245,\n",
       " 98398,\n",
       " 65631,\n",
       " 82015,\n",
       " 102,\n",
       " 65639,\n",
       " 65640,\n",
       " 49259,\n",
       " 65646,\n",
       " 98415,\n",
       " 98416,\n",
       " 49263,\n",
       " 49267,\n",
       " 16500,\n",
       " 82035,\n",
       " 118,\n",
       " 16503,\n",
       " 122,\n",
       " 65659,\n",
       " 49275,\n",
       " 125,\n",
       " 32894,\n",
       " 133,\n",
       " 65669,\n",
       " 65670,\n",
       " 65671,\n",
       " 49293,\n",
       " 142,\n",
       " 65679,\n",
       " 32912,\n",
       " 98451,\n",
       " 150,\n",
       " 151,\n",
       " 32929,\n",
       " 49318,\n",
       " 49320,\n",
       " 65708,\n",
       " 82092,\n",
       " 82093,\n",
       " 98484,\n",
       " 98489,\n",
       " 187,\n",
       " 49340,\n",
       " 32957,\n",
       " 200,\n",
       " 16588,\n",
       " 32973,\n",
       " 65742,\n",
       " 98518,\n",
       " 16598,\n",
       " 49366,\n",
       " 65755,\n",
       " 220,\n",
       " 223,\n",
       " 65764,\n",
       " 82149,\n",
       " 82155,\n",
       " 16621,\n",
       " 49396,\n",
       " 65783,\n",
       " 33017,\n",
       " 65786,\n",
       " 254,\n",
       " 65790,\n",
       " 82176,\n",
       " 261,\n",
       " 65798,\n",
       " 65810,\n",
       " 275,\n",
       " 98586,\n",
       " 49442,\n",
       " 65833,\n",
       " 33068,\n",
       " 82220,\n",
       " 65838,\n",
       " 82221,\n",
       " 82224,\n",
       " 33073,\n",
       " 65843,\n",
       " 65844,\n",
       " 16691,\n",
       " 310,\n",
       " 16696,\n",
       " 82234,\n",
       " 65852,\n",
       " 49468,\n",
       " 318,\n",
       " 49471,\n",
       " 49472,\n",
       " 16706,\n",
       " 49475,\n",
       " 65862,\n",
       " 65863,\n",
       " 16712,\n",
       " 82248,\n",
       " 49483,\n",
       " 49493,\n",
       " 16726,\n",
       " 344,\n",
       " 16730,\n",
       " 65883,\n",
       " 82268,\n",
       " 65885,\n",
       " 350,\n",
       " 33120,\n",
       " 16745,\n",
       " 364,\n",
       " 98668,\n",
       " 65903,\n",
       " 33140,\n",
       " 98678,\n",
       " 65913,\n",
       " 33149,\n",
       " 49535,\n",
       " 33158,\n",
       " 49544,\n",
       " 82313,\n",
       " 33163,\n",
       " 16783,\n",
       " 33168,\n",
       " 401,\n",
       " 98709,\n",
       " 49558,\n",
       " 98715,\n",
       " 16796,\n",
       " 49565,\n",
       " 82333,\n",
       " 82334,\n",
       " 33189,\n",
       " 33191,\n",
       " 65960,\n",
       " 33193,\n",
       " 49579,\n",
       " 16812,\n",
       " 98740,\n",
       " 16822,\n",
       " 82359,\n",
       " 33210,\n",
       " 16827,\n",
       " 82365,\n",
       " 98751,\n",
       " 16832,\n",
       " 98754,\n",
       " 33220,\n",
       " 453,\n",
       " 49604,\n",
       " 98763,\n",
       " 461,\n",
       " 469,\n",
       " 49623,\n",
       " 16856,\n",
       " 33244,\n",
       " 49629,\n",
       " 16867,\n",
       " 16869,\n",
       " 98794,\n",
       " 82410,\n",
       " 82412,\n",
       " 495,\n",
       " 98803,\n",
       " 49651,\n",
       " 49656,\n",
       " 33273,\n",
       " 16889,\n",
       " 49658,\n",
       " 33276,\n",
       " 82426,\n",
       " 66046,\n",
       " 49669,\n",
       " 82437,\n",
       " 33290,\n",
       " 49674,\n",
       " 16911,\n",
       " 530,\n",
       " 33300,\n",
       " 66069,\n",
       " 16918,\n",
       " 66077,\n",
       " 542,\n",
       " 543,\n",
       " 82463,\n",
       " 66081,\n",
       " 16932,\n",
       " 66091,\n",
       " 556,\n",
       " 66093,\n",
       " 66094,\n",
       " 98860,\n",
       " 82475,\n",
       " 66097,\n",
       " 49709,\n",
       " 16942,\n",
       " 49715,\n",
       " 49716,\n",
       " 98877,\n",
       " 66111,\n",
       " 33346,\n",
       " 579,\n",
       " 33347,\n",
       " 82499,\n",
       " 49735,\n",
       " 49739,\n",
       " 588,\n",
       " 16983,\n",
       " 16991,\n",
       " 82528,\n",
       " 49761,\n",
       " 49762,\n",
       " 66152,\n",
       " 66153,\n",
       " 17003,\n",
       " 49777,\n",
       " 98932,\n",
       " 17012,\n",
       " 66166,\n",
       " 17020,\n",
       " 17021,\n",
       " 639,\n",
       " 640,\n",
       " 49793,\n",
       " 642,\n",
       " 17026,\n",
       " 98948,\n",
       " 17027,\n",
       " 49796,\n",
       " 82563,\n",
       " 17030,\n",
       " 66185,\n",
       " 82566,\n",
       " 651,\n",
       " 33422,\n",
       " 82576,\n",
       " 98962,\n",
       " 33427,\n",
       " 17043,\n",
       " 49812,\n",
       " 82584,\n",
       " 98970,\n",
       " 17050,\n",
       " 17052,\n",
       " 66207,\n",
       " 82592,\n",
       " 673,\n",
       " 33441,\n",
       " 17057,\n",
       " 17061,\n",
       " 33446,\n",
       " 49831,\n",
       " 66221,\n",
       " 82605,\n",
       " 687,\n",
       " 66224,\n",
       " 49841,\n",
       " 33459,\n",
       " 17076,\n",
       " 694,\n",
       " 33464,\n",
       " 33466,\n",
       " 33468,\n",
       " 99005,\n",
       " 66238,\n",
       " 99007,\n",
       " 82628,\n",
       " 710,\n",
       " 82630,\n",
       " 82632,\n",
       " 99017,\n",
       " 718,\n",
       " 82639,\n",
       " 49872,\n",
       " 82645,\n",
       " 82652,\n",
       " 49889,\n",
       " 82658,\n",
       " 99044,\n",
       " 743,\n",
       " 17127,\n",
       " 33513,\n",
       " 49897,\n",
       " 17132,\n",
       " 749,\n",
       " 49901,\n",
       " 49903,\n",
       " 82674,\n",
       " 33523,\n",
       " 17142,\n",
       " 759,\n",
       " 17144,\n",
       " 66299,\n",
       " 99068,\n",
       " 33535,\n",
       " 17152,\n",
       " 769,\n",
       " 33539,\n",
       " 66307,\n",
       " 66309,\n",
       " 82694,\n",
       " 17162,\n",
       " 99084,\n",
       " 49932,\n",
       " 17167,\n",
       " 49935,\n",
       " 66321,\n",
       " 99090,\n",
       " 66323,\n",
       " 82713,\n",
       " 99098,\n",
       " 66334,\n",
       " 82719,\n",
       " 800,\n",
       " 66337,\n",
       " 17185,\n",
       " 49954,\n",
       " 17190,\n",
       " 33576,\n",
       " 49962,\n",
       " 17196,\n",
       " 99120,\n",
       " 99122,\n",
       " 49972,\n",
       " 49974,\n",
       " 17207,\n",
       " 33592,\n",
       " 33593,\n",
       " 33600,\n",
       " 33604,\n",
       " 99140,\n",
       " 82757,\n",
       " 841,\n",
       " 49994,\n",
       " 845,\n",
       " 99149,\n",
       " 17235,\n",
       " 82771,\n",
       " 66389,\n",
       " 854,\n",
       " 82775,\n",
       " 33624,\n",
       " 33625,\n",
       " 858,\n",
       " 66395,\n",
       " 99163,\n",
       " 33629,\n",
       " 17245,\n",
       " 82782,\n",
       " 17249,\n",
       " 17251,\n",
       " 82787,\n",
       " 99173,\n",
       " 33638,\n",
       " 17255,\n",
       " 874,\n",
       " 66411,\n",
       " 82794,\n",
       " 17259,\n",
       " 878,\n",
       " 33647,\n",
       " 66415,\n",
       " 66417,\n",
       " 33650,\n",
       " 66418,\n",
       " 99182,\n",
       " 82798,\n",
       " 891,\n",
       " 17275,\n",
       " 82812,\n",
       " 33662,\n",
       " 17283,\n",
       " 66436,\n",
       " 901,\n",
       " 99208,\n",
       " 66441,\n",
       " 33674,\n",
       " 33675,\n",
       " 17291,\n",
       " 82829,\n",
       " 33682,\n",
       " 916,\n",
       " 66452,\n",
       " 17304,\n",
       " 33691,\n",
       " 33692,\n",
       " 66461,\n",
       " 66463,\n",
       " 82847,\n",
       " 33697,\n",
       " 99234,\n",
       " 931,\n",
       " 82848,\n",
       " 17317,\n",
       " 938,\n",
       " 17324,\n",
       " 82863,\n",
       " 944,\n",
       " 66480,\n",
       " 33716,\n",
       " 99254,\n",
       " 82878,\n",
       " 959,\n",
       " 33728,\n",
       " 99267,\n",
       " 33736,\n",
       " 33741,\n",
       " 17359,\n",
       " 978,\n",
       " 17364,\n",
       " 33750,\n",
       " 82905,\n",
       " 992,\n",
       " 17378,\n",
       " 33765,\n",
       " 99301,\n",
       " 1000,\n",
       " 50152,\n",
       " 1005,\n",
       " 99309,\n",
       " 50158,\n",
       " 82928,\n",
       " 33780,\n",
       " 99318,\n",
       " 1017,\n",
       " 33787,\n",
       " 66557,\n",
       " 1024,\n",
       " 17408,\n",
       " 17409,\n",
       " 1027,\n",
       " 50179,\n",
       " 82949,\n",
       " 82950,\n",
       " 1034,\n",
       " 50187,\n",
       " 1036,\n",
       " 50191,\n",
       " 66577,\n",
       " 66580,\n",
       " 99351,\n",
       " 82969,\n",
       " 82970,\n",
       " 17437,\n",
       " 99360,\n",
       " 1057,\n",
       " 33825,\n",
       " 99363,\n",
       " 82977,\n",
       " 82979,\n",
       " 33831,\n",
       " 66600,\n",
       " 17447,\n",
       " 33837,\n",
       " 50221,\n",
       " 66607,\n",
       " 99376,\n",
       " 33841,\n",
       " 82993,\n",
       " 99380,\n",
       " 66613,\n",
       " 82996,\n",
       " 82998,\n",
       " 33849,\n",
       " 83003,\n",
       " 66622,\n",
       " 1087,\n",
       " 1088,\n",
       " 99391,\n",
       " 17473,\n",
       " 83011,\n",
       " 50245,\n",
       " 1095,\n",
       " 1098,\n",
       " 66637,\n",
       " 1102,\n",
       " 99405,\n",
       " 50256,\n",
       " 99410,\n",
       " 99413,\n",
       " 66646,\n",
       " 99419,\n",
       " 66652,\n",
       " 83037,\n",
       " 1118,\n",
       " 83040,\n",
       " 99431,\n",
       " 1129,\n",
       " 17516,\n",
       " 83052,\n",
       " 66670,\n",
       " 50290,\n",
       " 17524,\n",
       " 33912,\n",
       " 83065,\n",
       " 33914,\n",
       " 66685,\n",
       " 83071,\n",
       " 17537,\n",
       " 50309,\n",
       " 33927,\n",
       " 99467,\n",
       " 99470,\n",
       " 50318,\n",
       " 99472,\n",
       " 66705,\n",
       " 83088,\n",
       " 66708,\n",
       " 66709,\n",
       " 50327,\n",
       " 99484,\n",
       " 99485,\n",
       " 83101,\n",
       " 99487,\n",
       " 66721,\n",
       " 33954,\n",
       " 1187,\n",
       " 83105,\n",
       " 50337,\n",
       " 50341,\n",
       " 99495,\n",
       " 66728,\n",
       " 50345,\n",
       " 50346,\n",
       " 66732,\n",
       " 83116,\n",
       " 66734,\n",
       " 50351,\n",
       " 50356,\n",
       " 99510,\n",
       " 17591,\n",
       " 50363,\n",
       " 99517,\n",
       " 66751,\n",
       " 17599,\n",
       " 1223,\n",
       " 33992,\n",
       " 99528,\n",
       " 17607,\n",
       " 50376,\n",
       " 17616,\n",
       " 99537,\n",
       " 66770,\n",
       " 99539,\n",
       " 1237,\n",
       " 1242,\n",
       " 66779,\n",
       " 66780,\n",
       " 17628,\n",
       " 1247,\n",
       " 17631,\n",
       " 83168,\n",
       " 50402,\n",
       " 99555,\n",
       " 50405,\n",
       " 34024,\n",
       " 50409,\n",
       " 50411,\n",
       " 1265,\n",
       " 34034,\n",
       " 83186,\n",
       " 17651,\n",
       " 50419,\n",
       " 1270,\n",
       " 34042,\n",
       " 1277,\n",
       " 66814,\n",
       " 99583,\n",
       " 17671,\n",
       " 17677,\n",
       " 34064,\n",
       " 1297,\n",
       " 66834,\n",
       " 50448,\n",
       " 50449,\n",
       " 50456,\n",
       " 66844,\n",
       " 83229,\n",
       " 83231,\n",
       " 1312,\n",
       " 66849,\n",
       " 50464,\n",
       " 17698,\n",
       " 17699,\n",
       " 83236,\n",
       " 17702,\n",
       " 1326,\n",
       " 17713,\n",
       " 66866,\n",
       " 34099,\n",
       " 99637,\n",
       " 1334,\n",
       " 66871,\n",
       " 83258,\n",
       " 66877,\n",
       " 1342,\n",
       " 50493,\n",
       " 1345,\n",
       " 99651,\n",
       " 66887,\n",
       " 1352,\n",
       " 50503,\n",
       " 1354,\n",
       " 17737,\n",
       " 50508,\n",
       " 66895,\n",
       " 99671,\n",
       " 66909,\n",
       " 99681,\n",
       " 83300,\n",
       " 17770,\n",
       " 1392,\n",
       " 66929,\n",
       " 34162,\n",
       " 17777,\n",
       " 1401,\n",
       " 1402,\n",
       " 34169,\n",
       " 50554,\n",
       " 99709,\n",
       " 50557,\n",
       " 1407,\n",
       " 66944,\n",
       " 66945,\n",
       " 1412,\n",
       " 1415,\n",
       " 34184,\n",
       " 1420,\n",
       " 99729,\n",
       " 1426,\n",
       " 34196,\n",
       " 83351,\n",
       " 66971,\n",
       " 1436,\n",
       " 83357,\n",
       " 66974,\n",
       " 1443,\n",
       " 83363,\n",
       " 50598,\n",
       " 34217,\n",
       " 66986,\n",
       " 83369,\n",
       " 1452,\n",
       " 66993,\n",
       " 50609,\n",
       " 1459,\n",
       " 34227,\n",
       " 66996,\n",
       " 83385,\n",
       " 50618,\n",
       " 50626,\n",
       " 50627,\n",
       " 83406,\n",
       " 83407,\n",
       " 34258,\n",
       " 50643,\n",
       " 83412,\n",
       " 99797,\n",
       " 50647,\n",
       " 99803,\n",
       " 50652,\n",
       " 50654,\n",
       " 1508,\n",
       " 50661,\n",
       " 99814,\n",
       " 1512,\n",
       " 1515,\n",
       " 67053,\n",
       " 99821,\n",
       " 17901,\n",
       " 99824,\n",
       " 99825,\n",
       " 83438,\n",
       " 50672,\n",
       " 83441,\n",
       " 50677,\n",
       " 17910,\n",
       " 83446,\n",
       " 83447,\n",
       " 83448,\n",
       " 17916,\n",
       " 17918,\n",
       " 34303,\n",
       " 67071,\n",
       " 34307,\n",
       " 83460,\n",
       " 67077,\n",
       " 67079,\n",
       " 99854,\n",
       " 83473,\n",
       " 67092,\n",
       " 67094,\n",
       " 99862,\n",
       " 1565,\n",
       " 34333,\n",
       " 1567,\n",
       " 34338,\n",
       " 1579,\n",
       " 83502,\n",
       " 83504,\n",
       " 1585,\n",
       " 67127,\n",
       " 99897,\n",
       " 50753,\n",
       " 83521,\n",
       " 1603,\n",
       " 1604,\n",
       " 17991,\n",
       " 17992,\n",
       " 50760,\n",
       " 83527,\n",
       " 67147,\n",
       " 17996,\n",
       " 67152,\n",
       " 34388,\n",
       " 67156,\n",
       " 99924,\n",
       " 1623,\n",
       " 34395,\n",
       " 99931,\n",
       " 18016,\n",
       " 1635,\n",
       " 99946,\n",
       " 67180,\n",
       " 99949,\n",
       " 99954,\n",
       " 34419,\n",
       " 83571,\n",
       " 99957,\n",
       " 83573,\n",
       " 18043,\n",
       " 50811,\n",
       " 67197,\n",
       " 34434,\n",
       " 99970,\n",
       " 18053,\n",
       " 83590,\n",
       " 34440,\n",
       " 1673,\n",
       " 50826,\n",
       " 1675,\n",
       " 1676,\n",
       " 34443,\n",
       " 67212,\n",
       " 67216,\n",
       " 18065,\n",
       " 18066,\n",
       " 18068,\n",
       " 18069,\n",
       " 50838,\n",
       " 50839,\n",
       " 34457,\n",
       " 50841,\n",
       " 1691,\n",
       " 1692,\n",
       " 83615,\n",
       " 1697,\n",
       " 50855,\n",
       " 34473,\n",
       " 67241,\n",
       " 50861,\n",
       " 34480,\n",
       " 67250,\n",
       " 18104,\n",
       " 18108,\n",
       " 83645,\n",
       " 18112,\n",
       " 18116,\n",
       " 67269,\n",
       " 1743,\n",
       " 83667,\n",
       " 67284,\n",
       " 50904,\n",
       " 83674,\n",
       " 50910,\n",
       " 34528,\n",
       " 18146,\n",
       " 50917,\n",
       " 83688,\n",
       " 50923,\n",
       " 1775,\n",
       " 18160,\n",
       " 18168,\n",
       " 34553,\n",
       " 67323,\n",
       " 18171,\n",
       " 50939,\n",
       " 50944,\n",
       " 1798,\n",
       " 83718,\n",
       " 34572,\n",
       " 1805,\n",
       " 83724,\n",
       " 34576,\n",
       " 67344,\n",
       " 83730,\n",
       " 67350,\n",
       " 1820,\n",
       " 1821,\n",
       " 34590,\n",
       " 50972,\n",
       " 1825,\n",
       " 50986,\n",
       " 50988,\n",
       " 34606,\n",
       " 1839,\n",
       " 67375,\n",
       " 50990,\n",
       " 50993,\n",
       " 67379,\n",
       " 83767,\n",
       " 51000,\n",
       " 1852,\n",
       " 83774,\n",
       " 1861,\n",
       " 18245,\n",
       " 1866,\n",
       " 67402,\n",
       " 1874,\n",
       " 83795,\n",
       " 83799,\n",
       " 67416,\n",
       " 18270,\n",
       " 51039,\n",
       " 83807,\n",
       " 83810,\n",
       " 51043,\n",
       " 51046,\n",
       " 67431,\n",
       " 51047,\n",
       " 1904,\n",
       " 67441,\n",
       " 18289,\n",
       " 34687,\n",
       " 1920,\n",
       " 34689,\n",
       " 51071,\n",
       " 51074,\n",
       " 83846,\n",
       " 18314,\n",
       " 51084,\n",
       " 34701,\n",
       " 18319,\n",
       " 34708,\n",
       " 34710,\n",
       " 18326,\n",
       " 51095,\n",
       " 67492,\n",
       " 67493,\n",
       " 51108,\n",
       " 18348,\n",
       " 83887,\n",
       " 1970,\n",
       " 83895,\n",
       " 34744,\n",
       " 51128,\n",
       " 83898,\n",
       " 83902,\n",
       " 34753,\n",
       " 83906,\n",
       " 51140,\n",
       " 18373,\n",
       " 67531,\n",
       " 18384,\n",
       " 2004,\n",
       " 51159,\n",
       " 2008,\n",
       " 2009,\n",
       " 18397,\n",
       " 18401,\n",
       " 83940,\n",
       " 2023,\n",
       " 67560,\n",
       " 34793,\n",
       " 67562,\n",
       " 34795,\n",
       " 51175,\n",
       " 83951,\n",
       " 51184,\n",
       " 83953,\n",
       " 2037,\n",
       " 34807,\n",
       " 18424,\n",
       " 51196,\n",
       " 51197,\n",
       " 2046,\n",
       " 51198,\n",
       " 18431,\n",
       " 34819,\n",
       " 2052,\n",
       " 18442,\n",
       " 34827,\n",
       " 18449,\n",
       " 2068,\n",
       " 18454,\n",
       " 67613,\n",
       " 83999,\n",
       " 34848,\n",
       " 67626,\n",
       " 51242,\n",
       " 67628,\n",
       " 34864,\n",
       " 2099,\n",
       " 67636,\n",
       " 34869,\n",
       " 84020,\n",
       " 51255,\n",
       " 51256,\n",
       " 67641,\n",
       " 2110,\n",
       " 18495,\n",
       " 51264,\n",
       " 67649,\n",
       " 2117,\n",
       " 67655,\n",
       " 51275,\n",
       " 51276,\n",
       " 51277,\n",
       " 67666,\n",
       " 51282,\n",
       " 67669,\n",
       " 18518,\n",
       " 67680,\n",
       " 18528,\n",
       " 34914,\n",
       " 67682,\n",
       " 51300,\n",
       " 2151,\n",
       " 84072,\n",
       " 34926,\n",
       " 2160,\n",
       " 2161,\n",
       " 34929,\n",
       " 67699,\n",
       " 18549,\n",
       " 51319,\n",
       " 2170,\n",
       " 51325,\n",
       " 51326,\n",
       " 18561,\n",
       " 2178,\n",
       " 84097,\n",
       " 34949,\n",
       " 67718,\n",
       " 18568,\n",
       " 2187,\n",
       " 84107,\n",
       " 51341,\n",
       " 2193,\n",
       " 67730,\n",
       " 18579,\n",
       " 84117,\n",
       " 67735,\n",
       " 51354,\n",
       " 84132,\n",
       " 2216,\n",
       " 2218,\n",
       " 51374,\n",
       " 51376,\n",
       " 84144,\n",
       " 18612,\n",
       " 18613,\n",
       " ...}"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_retrieve_result('什么时候发货')  # 通过倒排表返回文档IDs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
